{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# API Call Observability \n",
    "\n",
    "Using the new `instrumentation` package, we can get direct observability into API calls made using LLMs and emebdding models.\n",
    "\n",
    "In this notebook, we explore doing this in order to add observability to LLM and embedding calls."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "os.environ[\"OPENAI_API_KEY\"] = \"sk-...\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Defining an Event Handler"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index.core.instrumentation.event_handlers import BaseEventHandler\n",
    "from llama_index.core.instrumentation.events.llm import (\n",
    "    LLMCompletionEndEvent,\n",
    "    LLMChatEndEvent,\n",
    ")\n",
    "from llama_index.core.instrumentation.events.embedding import EmbeddingEndEvent\n",
    "\n",
    "\n",
    "class ModelEventHandler(BaseEventHandler):\n",
    "    @classmethod\n",
    "    def class_name(cls) -> str:\n",
    "        \"\"\"Class name.\"\"\"\n",
    "        return \"ModelEventHandler\"\n",
    "\n",
    "    def handle(self, event) -> None:\n",
    "        \"\"\"Logic for handling event.\"\"\"\n",
    "        if isinstance(event, LLMCompletionEndEvent):\n",
    "            print(f\"LLM Prompt length: {len(event.prompt)}\")\n",
    "            print(f\"LLM Completion: {str(event.response.text)}\")\n",
    "        elif isinstance(event, LLMChatEndEvent):\n",
    "            messages_str = \"\\n\".join([str(x) for x in event.messages])\n",
    "            print(f\"LLM Input Messages length: {len(messages_str)}\")\n",
    "            print(f\"LLM Response: {str(event.response.message)}\")\n",
    "        elif isinstance(event, EmbeddingEndEvent):\n",
    "            print(f\"Embedding {len(event.chunks)} text chunks\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Attaching the Event Handler"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index.core.instrumentation import get_dispatcher\n",
    "\n",
    "# root dispatcher\n",
    "root_dispatcher = get_dispatcher()\n",
    "\n",
    "# register event handler\n",
    "root_dispatcher.add_event_handler(ModelEventHandler())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Invoke the Handler!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Embedding 1 text chunks\n"
     ]
    }
   ],
   "source": [
    "from llama_index.core import Document, VectorStoreIndex\n",
    "\n",
    "index = VectorStoreIndex.from_documents([Document.example()])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Embedding 1 text chunks\n",
      "LLM Input Messages length: 1879\n",
      "LLM Response: assistant: LlamaIndex is a \"data framework\" designed to assist in building LLM apps. It offers tools such as data connectors for various data sources, ways to structure data for easy use with LLMs, an advanced retrieval/query interface, and integrations with different application frameworks. It caters to both beginner and advanced users, providing a high-level API for simple data ingestion and querying, as well as lower-level APIs for customization and extension of modules to suit specific requirements.\n"
     ]
    }
   ],
   "source": [
    "query_engine = index.as_query_engine()\n",
    "response = query_engine.query(\"Tell me about LLMs?\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Embedding 1 text chunks\n",
      "LLM Input Messages length: 1890\n",
      "LLM Response: assistant: \n",
      "LLM Input Messages length: 1890\n",
      "LLM Response: assistant: Hello\n",
      "LLM Input Messages length: 1890\n",
      "LLM Response: assistant: Hello world\n",
      "LLM Input Messages length: 1890\n",
      "LLM Response: assistant: Hello world!\n",
      "LLM Input Messages length: 1890\n",
      "LLM Response: assistant: Hello world!\n"
     ]
    }
   ],
   "source": [
    "query_engine = index.as_query_engine(streaming=True)\n",
    "response = query_engine.query(\"Repeat only these two words: Hello world!\")\n",
    "for r in response.response_gen:\n",
    "    ..."
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
